library(tidyverse)
source('R/report_plots.R')
## functions ------
# creates yyyy-ww label for grouping data
get_date_week <- function(x){
y <- lubridate::year(x)
w <- lubridate::week(x) |> str_pad(2, 'left', 0)
str_glue("{y}-{w}")
}
# get biweekly bin date for vector of dates
get_date_biweekly <- function(x){
day_of_week <- lubridate::wday(x)
case_when(
day_of_week %in% 1:3 ~ x + 3 - day_of_week,
TRUE ~ x + 5 - day_of_week,
)
}
# lookup table for date
week_midpoint_date_lookup <- function(start = '2021-01-01', end = '2023-01-01'){
ends = lubridate::as_date(c(start, end))
tibble(
date = seq(ends[1], ends[2], by = 1),
date_week = lubridate::week(date) |> str_pad(2, 'left', '0'),
date_year = lubridate::year(date),
week = str_glue("{date_year}-{date_week}"),
) |>
group_by(week) |>
summarise(date = mean(date))
}
binom_ci <- function(x, n){
Hmisc::binconf(x, n, method = 'wilson', alpha = 0.05) |>
as_tibble() |>
janitor::clean_names()
}
get_binom_ci <- function(data){
data |>
summarise(
x = sum(pcr_positive == 'Positive'),
n = n(),
binconf = map2(x, n, ~binom_ci(.x, .y))
) |>
unnest(binconf) |>
mutate(across(c(point_est, lower, upper), ~(.x*100) |> round(1)))
}
## data ----------
swabs <-
read_rds(here('data/cube.rds')) |>
filter(str_detect(site, '^UO_'))
# data from university of ottawa
wifi <-
read_rds(here('data/uo_wifi.rds')) |>
filter(date >= min(swabs$date_swab),
date <= max(swabs$date_swab))
# lookup table for waste water site names
lookup_ww <- tribble(
~site_abbr, ~site,
'UO_AA', 'Annex Residence',
'UO_FA', 'Faculty of Social Sciences',
'UO_FT', 'Friel Residence',
'UO_NA', 'Northern sampling site',
'UO_RGN', 'Roger Guindon Hall',
'UO_ST', 'Southern sampling site',
'UO_TBT', 'Tabaret Hall'
)
# UOttawa waste water data from R. Delatolla
uottawa_ww <-
readxl::read_excel(here::here('data/ww_university.xlsx')) |>
janitor::clean_names() |>
mutate(sample_date = as_date(sample_date)) |>
filter(
sample_date > '2021-09-15',
sample_date < '2022-05-05',
!is.na(viral_copies_per_copies_pep_avg),
site != 'UO_RGN'
) |>
left_join(lookup_ww, by = c('site' = 'site_abbr')) |>
mutate(
source =
source_name |>
str_remove('^Data - uOttawa - ') |>
str_remove('.xlsx$')
) |>
transmute(site,
sample_date,
signal = viral_copies_per_copies_pep_avg)
# ottawa wastewater data: daily means
regional_ww <-
read_rds(here('data/ww_ottawa.rds')) |>
select(sample_date, starts_with('cov_')) |>
pivot_longer(contains('cov_')) |>
mutate(target = str_extract(name, 'cov_n.'),
stat = str_extract(name, 'mean|sd'),
) |>
select(-name) |>
pivot_wider(names_from = stat, values_from = value) |>
mutate(.lower = mean - sd, .upper = mean + sd)
# important dates for context
event_dates <-
tribble(
~event, ~start, ~end,
'Reading Week', '2021-10-24', '2021-10-30',
'Holiday Break', '2021-12-22', '2022-01-04',
'Closure', '2022-01-04', '2022-01-31',
'Closure', '2022-02-16', '2022-02-21',
'Reading Week', '2022-02-20', '2022-02-26',
) |>
mutate(across(start:end, as_date))
# UOttawa cases data
cases <-
read_rds(here('data/cases_rule_imputed.rds')) |>
select(case, role, case_date, UC:TBT) |>
mutate(biweek = get_date_biweekly(case_date)) |>
nest(associated_sites = UC:TBT)
This plot shows the counts of positive (red) and negative (yellow) samples collected at each facility over time (x-axis). Samples that could not be tested are shown in navy. Only flocked swabs are shown. (Other sponge swabs were collected on 2022-04-28 were for comparing flocked and sponge swabs.)
plot_timeseries_summary(
swabs |>
filter(
swab_type %in% 'flock',
!negative_control,
!is.na(negative_control)
),
height_svg = 3.3,
width_svg = 8
)
This plot shows the counts of positive and negative samples collected at each facility over time.
plot_timeseries_locations(
swabs |>
filter(swab_type == 'flock'),
height_svg = 6,
width_svg = 8
)
This section contains results from modeling SARS-CoV-2 cases at UO using swab-PCR results as a predictor.
We created a random intercepts logistic regression model with the occurrence of cases (binary) as outcome and swab results for the previous week (the proportion of positive swabs) as predictor. The model has a random intercept for each site.
Our model formula is
cases ~ positives[lag 1 week] + (1|site).
The model is fit as follows:
swab_model <-
blme::bglmer(
cases_binary ~ detection_lag_1week + (1 | site),
data = uo_sites,
family = binomial
)
swab_model <- read_rds(here('model/uo_swab_model.rds'))
These plots show the swab results, cases, and predictions by the
current model.
# uo cases data - long table
cases_long <-
read_rds(here('./data/latest_uo_cases.rds')) |>
filter(!is.na(positive_result),
positive_result > lubridate::as_date('2021-09-14')) |>
select(case, role, confirmed_by_test, positive_result, c(`90U`:TBT)) |>
mutate(
`Campus Total` = TRUE,
# 3 days transmissibility
transmission_start = positive_result - days(3)
) |>
pivot_longer(`90U`:`Campus Total`, names_to = 'site') |>
filter(value) |>
select(-value) |>
mutate(site = fct_relevel(site, "Campus Total", after = 0L))
uo_pred <- read_rds(here('model/uo_pred.rds'))
# create a 3-panel fig for each of 6 buildings.
map(
# site names for filtering
.x = swabs$site |> unique() |> str_remove('UO_'),
.f = ~plot_uo_pred_swab_case(
swabs = swabs,
cases = cases_long,
preds = uo_pred,
site_select = .x
)
) |> wrap_plots(guides = 'collect', ncol = 1)
These tables show the model coefficients and statistics.
broom.mixed::glance(swab_model) |>
mutate_if(is.numeric, round, 2) |>
kableExtra::kable(caption = 'Model statistics',
format = "html") |>
kableExtra::kable_styling(bootstrap_options = "striped",
full_width = F, position = "left")
| nobs | sigma | logLik | AIC | BIC | deviance | df.residual |
|---|---|---|---|---|---|---|
| 84 | 1 | -24.52 | 55.04 | 62.33 | 42.83 | 81 |
broom.mixed::tidy(swab_model) |>
mutate(p.value = as.character(p.value)) |>
mutate_if(is.numeric, round, 3) |>
mutate(p.value = as.numeric(p.value)) |>
kableExtra::kable(caption = 'Model parameters',
format = "html") |>
kableExtra::kable_styling(bootstrap_options = "striped",
full_width = F, position = "left")
| effect | group | term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|---|---|
| fixed | NA | (Intercept) | -3.301 | 0.804 | -4.107 | 0.0000401 |
| fixed | NA | detection_lag1w | 7.114 | 2.112 | 3.369 | 0.0007549 |
| ran_pars | site | sd__(Intercept) | 1.067 | NA | NA | NA |
# tidy(swab_model, effects = 'ran_vals')
sjPlot::tab_model(
swab_model,
show.re.var = TRUE,
pred.labels = c("(Intercept)", "Swabs @ t-1week"),
dv.labels = "Relation between UO cases (y/n) and proportion\n of positive swabs the previous week"
)
| Relation between UO cases (y/n) and proportion of positive swabs the previous week | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 0.04 | 0.01 – 0.18 | <0.001 |
| Swabs @ t-1week | 1229.31 | 19.59 – 77128.28 | 0.001 |
| Random Effects | |||
| σ2 | 3.29 | ||
| τ00 site | 1.14 | ||
| ICC | 0.26 | ||
| N site | 6 | ||
| Observations | 84 | ||
| Marginal R2 / Conditional R2 | 0.378 / 0.538 | ||
This plot shows the modelling data as points (detection lagged 1 week and cases at a given site- y/n), as well as how the probability of future cases varies by the previous weeks detection level and site, according to our model (curves).
read_rds(here('fig/plt_model_resp_curves.rds')) +
labs(subtitle = 'Points show case/swab data and curves show model probability values')
This plot shows the odds ratios for the intercepts of the model (an intercept for each site).
# random effect plot: site intercepts; axes are flipped
sjPlot::plot_model(swab_model, type = 're') +
theme_light() +
labs(title = "Random effects", y = 'Odds Ratio')
This panel shows the cases counts at UO over the course of our sampling period. The case data shown represents the days on which a positive test was reported (black rug lines) and the presumed start of transmissiblity for each case (red lines).
# generated from file R/plot_UO_case_load.R
read_rds(here('fig/plt_cases_swabs.rds'))
This panel shows linked data from swab results, CO2
readings, and wifi traffic (number of users @ peak, daily) during our
study period. Unfortunately, we do not have wifi data for 90U.
uo_multivariate <- make_uo_panel(swabs, wifi)
write_rds(uo_multivariate, here('fig/uo_multivariate.rds'))
uo_multivariate & theme(text = element_text(size = 13))
rm(uo_multivariate)
plot_UO_wifi_ts(wifi, swabs)
This panel shows a time-series of the daily peak number of wifi users
at UO facilities. Sampling days are highlighted in blue.
regional_ww |>
group_by(sample_date) |>
summarise(mean = mean(mean)) |>
ggplot() +
geom_rect(data = tibble(xmin = min(swabs$date_swab),
xmax = max(swabs$date_swab),
ymin = -Inf,
ymax = Inf),
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
fill = 'grey90', color = 'gray',
size = 0.5,
lty = 2, alpha = 0.5) +
geom_text(aes(as_date('2022-01-01'), 0.0035, label = 'UOttawa study'),
color = 'darkgray') +
geom_point(aes(sample_date, mean),
alpha = 0.76, size = 0.85, shape = 1) +
geom_smooth(aes(sample_date, mean),
method = 'loess', formula = 'y ~ x',
span = 0.18, size = 0.75, alpha = 0.25,
fill = 'blue') +
scale_shape(solid = F) +
labs(x = 'Date',
y = 'Mean SARS-CoV-2 / PPMoV ',
color = 'PCR Target',
fill = 'PCR Target',
shape = 'PCR Target',
subtitle = 'Ottawa Waste-water: Combined N1 and N2 targets')
2022-02-28 Update: This data uses weekly aggregates, the new results use biweekly aggregates (in case you are wondering why the r values aren’t the same).
We probably want to summarise our swabs at the weekly (or biweekly?) level for comparison to the waste-water signal. This is what the weekly campus-wide time-series looks like compared with our weekly campus-wide swab positivity:
ww_weekly_summary <-
uottawa_ww |>
mutate(week = get_date_week(sample_date)) |>
left_join(week_midpoint_date_lookup('2021-01-01', '2022-12-31'),
by = 'week') |>
select(date, signal, site) |>
group_by(date) |>
summarise(signal_mean = mean(signal),
signal_sd = sd(signal))
plt_ww_weekly <- ww_weekly_summary |>
ggplot(aes(date,
signal_mean,
# ymin = signal_mean - signal_sd,
# ymax = signal_sd + signal_sd
)) +
geom_line() +
geom_point() +
geom_point(data = uottawa_ww,
aes(as_date(sample_date), signal),
size = 1, shape = 1, alpha = 0.5) +
labs(x = 'Date', y = 'SARS-CoV-2 / PMMoV',
title = 'Weekly campus-wide average SARS-CoV-2 waste-water detection')
swabs_weekly_summary <- swabs |>
mutate(week = get_date_week(date_swab)) |>
left_join(week_midpoint_date_lookup('2021-01-01', '2022-12-31'),
by = 'week') |>
group_by(date) |>
summarise(
x = sum(pcr_positive == 'Positive'),
n = n(),
positivity = list(binom_ci(x, n))
) |>
unnest_wider(positivity)
plt_swabs_weekly <- swabs_weekly_summary |>
ggplot(aes(x = date, y = point_est*100)) +
geom_line() +
geom_point() +
scale_x_date() +
scale_y_continuous(limits = c(0,100)) +
labs(x = 'Date', y = 'Swab positivity (%)',
title = 'Weekly campus-wide surface swab detection rate')
wrap_plots(plt_swabs_weekly, plt_ww_weekly, ncol = 1)
If we look at the relationship between swabs and waste-water on weeks where both results are available, we see that they have a positive correlation.
campus_ww_swab_summary <-
swabs_weekly_summary |>
select(date, point_est) |>
left_join(ww_weekly_summary, by = 'date') |>
transmute(date, ww = signal_mean, swabs = point_est*100)
campus_ww_swab_summary |>
ggplot(aes(swabs, ww, color = date)) +
geom_smooth(method = 'lm', color = 'gray', fill = 'gray') +
geom_point() +
scale_color_date() +
coord_fixed(ratio = 18000) +
labs(x = 'Swab positivity (%)',
y = 'Waste-water signal',
color = "Sample\nWeek")
cortest_swab_ww <-
lst(
pearson = cor.test(
campus_ww_swab_summary$ww,
campus_ww_swab_summary$swabs,
method = 'pearson'
),
pearson_est = pearson$estimate |> round(2),
pearson_lower = pearson$conf.int[1] |> round(2),
pearson_upper = pearson$conf.int[2] |> round(2),
spearman = cor.test(
campus_ww_swab_summary$ww,
campus_ww_swab_summary$swabs,
method = 'spearman'
),
spearman_est = spearman$estimate |> round(3),
# spearman_lower = spearman$conf.int[1] |> round(2),
# spearman_upper = spearman$conf.int[2] |> round(2),
kendall = cor.test(
campus_ww_swab_summary$ww,
campus_ww_swab_summary$swabs,
method = 'kendall'
),
kendall_est = kendall$estimate |> round(2),
# kendall_lower = kendall$conf.int[1] |> round(2),
# kendall_upper = kendall$conf.int[2] |> round(2),
) |>
suppressWarnings() |>
glue_data(
"The Pearson's *r* (linear correlation) is {pearson_est} ({pearson_lower}-{pearson_upper}).
The Spearman's *r* (rank correlation) is {spearman_est}..
The Kendall's *Tau* is {kendall_est}."
)
Pearson’s r is more affected by outliers. Spearman’s r or Kendall’s Tau are probably the more reliable measures of correlation between these variables.
(Data doesn’t appear normally distributed, btw)
campus_ww_swab_summary |>
pivot_longer(cols = c(ww, swabs)) |>
ggplot(aes(value)) +
geom_histogram(bins = 15, color = 'lightgray') +
facet_wrap(~name, scales = 'free')
I’ve removed the off-campus site (Robert Guidon Hall) and retained the other 6 near-campus sites (raw data below).
uottawa_ww |>
ggplot(aes(sample_date, signal)) +
geom_path(alpha = 0.6) +
geom_point(alpha = 0.5, size = 0.5) +
labs(x = 'Date', y = 'SARS-CoV-2 / PPMoV') +
facet_wrap(~site, ncol = 2)